{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\masaikk\\anaconda3\\envs\\mytorch\\lib\\site-packages\\gym\\envs\\classic_control\\cartpole.py:211: UserWarning: \u001B[33mWARN: You are calling render method without specifying any render mode. You can specify the render_mode at initialization, e.g. gym(\"CartPole-v0\", render_mode=\"rgb_array\")\u001B[0m\n",
      "  gym.logger.warn(\n"
     ]
    },
    {
     "ename": "TypeError",
     "evalue": "Image data of dtype object cannot be converted to float",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mTypeError\u001B[0m                                 Traceback (most recent call last)",
      "Input \u001B[1;32mIn [2]\u001B[0m, in \u001B[0;36m<cell line: 16>\u001B[1;34m()\u001B[0m\n\u001B[0;32m     12\u001B[0m     plt\u001B[38;5;241m.\u001B[39mimshow(env\u001B[38;5;241m.\u001B[39mrender())\n\u001B[0;32m     13\u001B[0m     plt\u001B[38;5;241m.\u001B[39mshow()\n\u001B[1;32m---> 16\u001B[0m \u001B[43mshow\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n",
      "Input \u001B[1;32mIn [2]\u001B[0m, in \u001B[0;36mshow\u001B[1;34m()\u001B[0m\n\u001B[0;32m     11\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mshow\u001B[39m():\n\u001B[1;32m---> 12\u001B[0m     \u001B[43mplt\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mimshow\u001B[49m\u001B[43m(\u001B[49m\u001B[43menv\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mrender\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m     13\u001B[0m     plt\u001B[38;5;241m.\u001B[39mshow()\n",
      "File \u001B[1;32m~\\anaconda3\\envs\\mytorch\\lib\\site-packages\\matplotlib\\_api\\deprecation.py:454\u001B[0m, in \u001B[0;36mmake_keyword_only.<locals>.wrapper\u001B[1;34m(*args, **kwargs)\u001B[0m\n\u001B[0;32m    448\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(args) \u001B[38;5;241m>\u001B[39m name_idx:\n\u001B[0;32m    449\u001B[0m     warn_deprecated(\n\u001B[0;32m    450\u001B[0m         since, message\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mPassing the \u001B[39m\u001B[38;5;132;01m%(name)s\u001B[39;00m\u001B[38;5;124m \u001B[39m\u001B[38;5;132;01m%(obj_type)s\u001B[39;00m\u001B[38;5;124m \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m    451\u001B[0m         \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mpositionally is deprecated since Matplotlib \u001B[39m\u001B[38;5;132;01m%(since)s\u001B[39;00m\u001B[38;5;124m; the \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m    452\u001B[0m         \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mparameter will become keyword-only \u001B[39m\u001B[38;5;132;01m%(removal)s\u001B[39;00m\u001B[38;5;124m.\u001B[39m\u001B[38;5;124m\"\u001B[39m,\n\u001B[0;32m    453\u001B[0m         name\u001B[38;5;241m=\u001B[39mname, obj_type\u001B[38;5;241m=\u001B[39m\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mparameter of \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mfunc\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m()\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m--> 454\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mfunc\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n",
      "File \u001B[1;32m~\\anaconda3\\envs\\mytorch\\lib\\site-packages\\matplotlib\\pyplot.py:2623\u001B[0m, in \u001B[0;36mimshow\u001B[1;34m(X, cmap, norm, aspect, interpolation, alpha, vmin, vmax, origin, extent, interpolation_stage, filternorm, filterrad, resample, url, data, **kwargs)\u001B[0m\n\u001B[0;32m   2617\u001B[0m \u001B[38;5;129m@_copy_docstring_and_deprecators\u001B[39m(Axes\u001B[38;5;241m.\u001B[39mimshow)\n\u001B[0;32m   2618\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mimshow\u001B[39m(\n\u001B[0;32m   2619\u001B[0m         X, cmap\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m, norm\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m, aspect\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m, interpolation\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[0;32m   2620\u001B[0m         alpha\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m, vmin\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m, vmax\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m, origin\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m, extent\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m, \u001B[38;5;241m*\u001B[39m,\n\u001B[0;32m   2621\u001B[0m         interpolation_stage\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m, filternorm\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m, filterrad\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m4.0\u001B[39m,\n\u001B[0;32m   2622\u001B[0m         resample\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m, url\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m, data\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs):\n\u001B[1;32m-> 2623\u001B[0m     __ret \u001B[38;5;241m=\u001B[39m \u001B[43mgca\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mimshow\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m   2624\u001B[0m \u001B[43m        \u001B[49m\u001B[43mX\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mcmap\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcmap\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mnorm\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mnorm\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43maspect\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43maspect\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   2625\u001B[0m \u001B[43m        \u001B[49m\u001B[43minterpolation\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43minterpolation\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43malpha\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43malpha\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mvmin\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mvmin\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   2626\u001B[0m \u001B[43m        \u001B[49m\u001B[43mvmax\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mvmax\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43morigin\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43morigin\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mextent\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mextent\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   2627\u001B[0m \u001B[43m        \u001B[49m\u001B[43minterpolation_stage\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43minterpolation_stage\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   2628\u001B[0m \u001B[43m        \u001B[49m\u001B[43mfilternorm\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mfilternorm\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mfilterrad\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mfilterrad\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mresample\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mresample\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   2629\u001B[0m \u001B[43m        \u001B[49m\u001B[43murl\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43murl\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43m{\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mdata\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m:\u001B[49m\u001B[43m \u001B[49m\u001B[43mdata\u001B[49m\u001B[43m}\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43;01mif\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mdata\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;129;43;01mis\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;129;43;01mnot\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43;01mNone\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43;01melse\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43m{\u001B[49m\u001B[43m}\u001B[49m\u001B[43m)\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   2630\u001B[0m \u001B[43m        \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m   2631\u001B[0m     sci(__ret)\n\u001B[0;32m   2632\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m __ret\n",
      "File \u001B[1;32m~\\anaconda3\\envs\\mytorch\\lib\\site-packages\\matplotlib\\_api\\deprecation.py:454\u001B[0m, in \u001B[0;36mmake_keyword_only.<locals>.wrapper\u001B[1;34m(*args, **kwargs)\u001B[0m\n\u001B[0;32m    448\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(args) \u001B[38;5;241m>\u001B[39m name_idx:\n\u001B[0;32m    449\u001B[0m     warn_deprecated(\n\u001B[0;32m    450\u001B[0m         since, message\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mPassing the \u001B[39m\u001B[38;5;132;01m%(name)s\u001B[39;00m\u001B[38;5;124m \u001B[39m\u001B[38;5;132;01m%(obj_type)s\u001B[39;00m\u001B[38;5;124m \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m    451\u001B[0m         \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mpositionally is deprecated since Matplotlib \u001B[39m\u001B[38;5;132;01m%(since)s\u001B[39;00m\u001B[38;5;124m; the \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m    452\u001B[0m         \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mparameter will become keyword-only \u001B[39m\u001B[38;5;132;01m%(removal)s\u001B[39;00m\u001B[38;5;124m.\u001B[39m\u001B[38;5;124m\"\u001B[39m,\n\u001B[0;32m    453\u001B[0m         name\u001B[38;5;241m=\u001B[39mname, obj_type\u001B[38;5;241m=\u001B[39m\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mparameter of \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mfunc\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m()\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m--> 454\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mfunc\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n",
      "File \u001B[1;32m~\\anaconda3\\envs\\mytorch\\lib\\site-packages\\matplotlib\\__init__.py:1423\u001B[0m, in \u001B[0;36m_preprocess_data.<locals>.inner\u001B[1;34m(ax, data, *args, **kwargs)\u001B[0m\n\u001B[0;32m   1420\u001B[0m \u001B[38;5;129m@functools\u001B[39m\u001B[38;5;241m.\u001B[39mwraps(func)\n\u001B[0;32m   1421\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21minner\u001B[39m(ax, \u001B[38;5;241m*\u001B[39margs, data\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs):\n\u001B[0;32m   1422\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m data \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[1;32m-> 1423\u001B[0m         \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mfunc\u001B[49m\u001B[43m(\u001B[49m\u001B[43max\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;28;43mmap\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43msanitize_sequence\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43margs\u001B[49m\u001B[43m)\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m   1425\u001B[0m     bound \u001B[38;5;241m=\u001B[39m new_sig\u001B[38;5;241m.\u001B[39mbind(ax, \u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m   1426\u001B[0m     auto_label \u001B[38;5;241m=\u001B[39m (bound\u001B[38;5;241m.\u001B[39marguments\u001B[38;5;241m.\u001B[39mget(label_namer)\n\u001B[0;32m   1427\u001B[0m                   \u001B[38;5;129;01mor\u001B[39;00m bound\u001B[38;5;241m.\u001B[39mkwargs\u001B[38;5;241m.\u001B[39mget(label_namer))\n",
      "File \u001B[1;32m~\\anaconda3\\envs\\mytorch\\lib\\site-packages\\matplotlib\\axes\\_axes.py:5604\u001B[0m, in \u001B[0;36mAxes.imshow\u001B[1;34m(self, X, cmap, norm, aspect, interpolation, alpha, vmin, vmax, origin, extent, interpolation_stage, filternorm, filterrad, resample, url, **kwargs)\u001B[0m\n\u001B[0;32m   5596\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mset_aspect(aspect)\n\u001B[0;32m   5597\u001B[0m im \u001B[38;5;241m=\u001B[39m mimage\u001B[38;5;241m.\u001B[39mAxesImage(\u001B[38;5;28mself\u001B[39m, cmap\u001B[38;5;241m=\u001B[39mcmap, norm\u001B[38;5;241m=\u001B[39mnorm,\n\u001B[0;32m   5598\u001B[0m                       interpolation\u001B[38;5;241m=\u001B[39minterpolation, origin\u001B[38;5;241m=\u001B[39morigin,\n\u001B[0;32m   5599\u001B[0m                       extent\u001B[38;5;241m=\u001B[39mextent, filternorm\u001B[38;5;241m=\u001B[39mfilternorm,\n\u001B[0;32m   5600\u001B[0m                       filterrad\u001B[38;5;241m=\u001B[39mfilterrad, resample\u001B[38;5;241m=\u001B[39mresample,\n\u001B[0;32m   5601\u001B[0m                       interpolation_stage\u001B[38;5;241m=\u001B[39minterpolation_stage,\n\u001B[0;32m   5602\u001B[0m                       \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[1;32m-> 5604\u001B[0m \u001B[43mim\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mset_data\u001B[49m\u001B[43m(\u001B[49m\u001B[43mX\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m   5605\u001B[0m im\u001B[38;5;241m.\u001B[39mset_alpha(alpha)\n\u001B[0;32m   5606\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m im\u001B[38;5;241m.\u001B[39mget_clip_path() \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m   5607\u001B[0m     \u001B[38;5;66;03m# image does not already have clipping set, clip to axes patch\u001B[39;00m\n",
      "File \u001B[1;32m~\\anaconda3\\envs\\mytorch\\lib\\site-packages\\matplotlib\\image.py:701\u001B[0m, in \u001B[0;36m_ImageBase.set_data\u001B[1;34m(self, A)\u001B[0m\n\u001B[0;32m    697\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_A \u001B[38;5;241m=\u001B[39m cbook\u001B[38;5;241m.\u001B[39msafe_masked_invalid(A, copy\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m)\n\u001B[0;32m    699\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_A\u001B[38;5;241m.\u001B[39mdtype \u001B[38;5;241m!=\u001B[39m np\u001B[38;5;241m.\u001B[39muint8 \u001B[38;5;129;01mand\u001B[39;00m\n\u001B[0;32m    700\u001B[0m         \u001B[38;5;129;01mnot\u001B[39;00m np\u001B[38;5;241m.\u001B[39mcan_cast(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_A\u001B[38;5;241m.\u001B[39mdtype, \u001B[38;5;28mfloat\u001B[39m, \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124msame_kind\u001B[39m\u001B[38;5;124m\"\u001B[39m)):\n\u001B[1;32m--> 701\u001B[0m     \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mTypeError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mImage data of dtype \u001B[39m\u001B[38;5;132;01m{}\u001B[39;00m\u001B[38;5;124m cannot be converted to \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m    702\u001B[0m                     \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mfloat\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;241m.\u001B[39mformat(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_A\u001B[38;5;241m.\u001B[39mdtype))\n\u001B[0;32m    704\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_A\u001B[38;5;241m.\u001B[39mndim \u001B[38;5;241m==\u001B[39m \u001B[38;5;241m3\u001B[39m \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_A\u001B[38;5;241m.\u001B[39mshape[\u001B[38;5;241m-\u001B[39m\u001B[38;5;241m1\u001B[39m] \u001B[38;5;241m==\u001B[39m \u001B[38;5;241m1\u001B[39m:\n\u001B[0;32m    705\u001B[0m     \u001B[38;5;66;03m# If just one dimension assume scalar and apply colormap\u001B[39;00m\n\u001B[0;32m    706\u001B[0m     \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_A \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_A[:, :, \u001B[38;5;241m0\u001B[39m]\n",
      "\u001B[1;31mTypeError\u001B[0m: Image data of dtype object cannot be converted to float"
     ]
    },
    {
     "data": {
      "text/plain": "<Figure size 640x480 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAbAAAAGiCAYAAACGUJO6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAbB0lEQVR4nO3df0zd1f3H8RfQcqmx0DrGhbKrrHX+tqWCZVgb53IniQbXPxaZNYURf0xlRnuz2WJbUKulq7Yjs2hj1ekfOqpGjbEEp0xiVJZGWhKdbU2lFWa8tyWu3I4qtNzz/WPfXocFywf50bc8H8nnD84+537OPWH36b2995LgnHMCAMCYxIleAAAAI0HAAAAmETAAgEkEDABgEgEDAJhEwAAAJhEwAIBJBAwAYBIBAwCYRMAAACZ5Dtjbb7+t4uJizZo1SwkJCXrllVdOOqe5uVmXXHKJfD6fzj77bD399NMjWCoAAF/zHLCenh7NmzdPdXV1wzp/3759uuaaa3TllVeqra1Nd911l2666Sa9/vrrnhcLAMBxCd/ly3wTEhL08ssva/HixUOes3z5cm3btk0ffvhhfOzXv/61Dh06pMbGxpFeGgAwyU0Z6wu0tLQoGAwOGCsqKtJdd9015Jze3l719vbGf47FYvriiy/0gx/8QAkJCWO1VADAGHDO6fDhw5o1a5YSE0fvrRdjHrBwOCy/3z9gzO/3KxqN6ssvv9S0adNOmFNTU6P77rtvrJcGABhHnZ2d+tGPfjRqtzfmARuJyspKhUKh+M/d3d0688wz1dnZqdTU1AlcGQDAq2g0qkAgoOnTp4/q7Y55wDIzMxWJRAaMRSIRpaamDvrsS5J8Pp98Pt8J46mpqQQMAIwa7X8CGvPPgRUWFqqpqWnA2BtvvKHCwsKxvjQA4HvMc8D+85//qK2tTW1tbZL++zb5trY2dXR0SPrvy3+lpaXx82+99Va1t7fr7rvv1u7du/Xoo4/q+eef17Jly0bnHgAAJiXPAXv//fc1f/58zZ8/X5IUCoU0f/58VVVVSZI+//zzeMwk6cc//rG2bdumN954Q/PmzdOGDRv0xBNPqKioaJTuAgBgMvpOnwMbL9FoVGlpaeru7ubfwADAmLF6DOe7EAEAJhEwAIBJBAwAYBIBAwCYRMAAACYRMACASQQMAGASAQMAmETAAAAmETAAgEkEDABgEgEDAJhEwAAAJhEwAIBJBAwAYBIBAwCYRMAAACYRMACASQQMAGASAQMAmETAAAAmETAAgEkEDABgEgEDAJhEwAAAJhEwAIBJBAwAYBIBAwCYRMAAACYRMACASQQMAGASAQMAmETAAAAmETAAgEkEDABgEgEDAJhEwAAAJhEwAIBJBAwAYBIBAwCYRMAAACYRMACASQQMAGASAQMAmETAAAAmETAAgEkEDABgEgEDAJhEwAAAJhEwAIBJBAwAYBIBAwCYRMAAACYRMACASQQMAGASAQMAmETAAAAmETAAgEkEDABgEgEDAJhEwAAAJhEwAIBJBAwAYNKIAlZXV6ecnBylpKSooKBA27dv/9bza2trde6552ratGkKBAJatmyZvvrqqxEtGAAAaQQB27p1q0KhkKqrq7Vjxw7NmzdPRUVFOnDgwKDnP/fcc1qxYoWqq6u1a9cuPfnkk9q6davuueee77x4AMDk5TlgGzdu1M0336zy8nJdcMEF2rx5s0477TQ99dRTg57/3nvvaeHChVqyZIlycnJ01VVX6frrrz/pszYAAL6Np4D19fWptbVVwWDw6xtITFQwGFRLS8ugcy677DK1trbGg9Xe3q6GhgZdffXVQ16nt7dX0Wh0wAEAwP+a4uXkrq4u9ff3y+/3Dxj3+/3avXv3oHOWLFmirq4uXX755XLO6dixY7r11lu/9SXEmpoa3XfffV6WBgCYZMb8XYjNzc1au3atHn30Ue3YsUMvvfSStm3bpjVr1gw5p7KyUt3d3fGjs7NzrJcJADDG0zOw9PR0JSUlKRKJDBiPRCLKzMwcdM7q1au1dOlS3XTTTZKkiy++WD09Pbrlllu0cuVKJSae2FCfzyefz+dlaQCAScbTM7Dk5GTl5eWpqakpPhaLxdTU1KTCwsJB5xw5cuSESCUlJUmSnHNe1wsAgCSPz8AkKRQKqaysTPn5+VqwYIFqa2vV09Oj8vJySVJpaamys7NVU1MjSSouLtbGjRs1f/58FRQUaO/evVq9erWKi4vjIQMAwCvPASspKdHBgwdVVVWlcDis3NxcNTY2xt/Y0dHRMeAZ16pVq5SQkKBVq1bps88+0w9/+EMVFxfrwQcfHL17AQCYdBKcgdfxotGo0tLS1N3drdTU1IleDgDAg7F6DOe7EAEAJhEwAIBJBAwAYBIBAwCYRMAAACYRMACASQQMAGASAQMAmETAAAAmETAAgEkEDABgEgEDAJhEwAAAJhEwAIBJBAwAYBIBAwCYRMAAACYRMACASQQMAGASAQMAmETAAAAmETAAgEkEDABgEgEDAJhEwAAAJhEwAIBJBAwAYBIBAwCYRMAAACYRMACASQQMAGASAQMAmETAAAAmETAAgEkEDABgEgEDAJhEwAAAJhEwAIBJBAwAYBIBAwCYRMAAACYRMACASQQMAGASAQMAmETAAAAmETAAgEkEDABgEgEDAJhEwAAAJhEwAIBJBAwAYBIBAwCYRMAAACYRMACASQQMAGASAQMAmETAAAAmETAAgEkEDABgEgEDAJhEwAAAJhEwAIBJBAwAYNKIAlZXV6ecnBylpKSooKBA27dv/9bzDx06pIqKCmVlZcnn8+mcc85RQ0PDiBYMAIAkTfE6YevWrQqFQtq8ebMKCgpUW1uroqIi7dmzRxkZGSec39fXp1/84hfKyMjQiy++qOzsbH366aeaMWPGaKwfADBJJTjnnJcJBQUFuvTSS7Vp0yZJUiwWUyAQ0B133KEVK1accP7mzZv10EMPaffu3Zo6deqIFhmNRpWWlqbu7m6lpqaO6DYAABNjrB7DPb2E2NfXp9bWVgWDwa9vIDFRwWBQLS0tg8559dVXVVhYqIqKCvn9fl100UVau3at+vv7h7xOb2+votHogAMAgP/lKWBdXV3q7++X3+8fMO73+xUOhwed097erhdffFH9/f1qaGjQ6tWrtWHDBj3wwANDXqempkZpaWnxIxAIeFkmAGASGPN3IcZiMWVkZOjxxx9XXl6eSkpKtHLlSm3evHnIOZWVleru7o4fnZ2dY71MAIAxnt7EkZ6erqSkJEUikQHjkUhEmZmZg87JysrS1KlTlZSUFB87//zzFQ6H1dfXp+Tk5BPm+Hw++Xw+L0sDAEwynp6BJScnKy8vT01NTfGxWCympqYmFRYWDjpn4cKF2rt3r2KxWHzs448/VlZW1qDxAgBgODy/hBgKhbRlyxY988wz2rVrl2677Tb19PSovLxcklRaWqrKysr4+bfddpu++OIL3Xnnnfr444+1bds2rV27VhUVFaN3LwAAk47nz4GVlJTo4MGDqqqqUjgcVm5urhobG+Nv7Ojo6FBi4tddDAQCev3117Vs2TLNnTtX2dnZuvPOO7V8+fLRuxcAgEnH8+fAJgKfAwMAu06Jz4EBAHCqIGAAAJMIGADAJAIGADCJgAEATCJgAACTCBgAwCQCBgAwiYABAEwiYAAAkwgYAMAkAgYAMImAAQBMImAAAJMIGADAJAIGADCJgAEATCJgAACTCBgAwCQCBgAwiYABAEwiYAAAkwgYAMAkAgYAMImAAQBMImAAAJMIGADAJAIGADCJgAEATCJgAACTCBgAwCQCBgAwiYABAEwiYAAAkwgYAMAkAgYAMImAAQBMImAAAJMIGADAJAIGADCJgAEATCJgAACTCBgAwCQCBgAwiYABAEwiYAAAkwgYAMAkAgYAMImAAQBMImAAAJMIGADAJAIGADCJgAEATCJgAACTCBgAwCQCBgAwiYABAEwiYAAAkwgYAMAkAgYAMImAAQBMImAAAJMIGADApBEFrK6uTjk5OUpJSVFBQYG2b98+rHn19fVKSEjQ4sWLR3JZAADiPAds69atCoVCqq6u1o4dOzRv3jwVFRXpwIED3zpv//79+v3vf69FixaNeLEAABznOWAbN27UzTffrPLycl1wwQXavHmzTjvtND311FNDzunv79cNN9yg++67T7Nnzz7pNXp7exWNRgccAAD8L08B6+vrU2trq4LB4Nc3kJioYDColpaWIefdf//9ysjI0I033jis69TU1CgtLS1+BAIBL8sEAEwCngLW1dWl/v5++f3+AeN+v1/hcHjQOe+8846efPJJbdmyZdjXqaysVHd3d/zo7Oz0skwAwCQwZSxv/PDhw1q6dKm2bNmi9PT0Yc/z+Xzy+XxjuDIAgHWeApaenq6kpCRFIpEB45FIRJmZmSec/8knn2j//v0qLi6Oj8Visf9eeMoU7dmzR3PmzBnJugEAk5ynlxCTk5OVl5enpqam+FgsFlNTU5MKCwtPOP+8887TBx98oLa2tvhx7bXX6sorr1RbWxv/tgUAGDHPLyGGQiGVlZUpPz9fCxYsUG1trXp6elReXi5JKi0tVXZ2tmpqapSSkqKLLrpowPwZM2ZI0gnjAAB44TlgJSUlOnjwoKqqqhQOh5Wbm6vGxsb4Gzs6OjqUmMgXfAAAxlaCc85N9CJOJhqNKi0tTd3d3UpNTZ3o5QAAPBirx3CeKgEATCJgAACTCBgAwCQCBgAwiYABAEwiYAAAkwgYAMAkAgYAMImAAQBMImAAAJMIGADAJAIGADCJgAEATCJgAACTCBgAwCQCBgAwiYABAEwiYAAAkwgYAMAkAgYAMImAAQBMImAAAJMIGADAJAIGADCJgAEATCJgAACTCBgAwCQCBgAwiYABAEwiYAAAkwgYAMAkAgYAMImAAQBMImAAAJMIGADAJAIGADCJgAEATCJgAACTCBgAwCQCBgAwiYABAEwiYAAAkwgYAMAkAgYAMImAAQBMImAAAJMIGADAJAIGADCJgAEATCJgAACTCBgAwCQCBgAwiYABAEwiYAAAkwgYAMAkAgYAMImAAQBMImAAAJMIGADAJAIGADCJgAEATCJgAACTCBgAwKQRBayurk45OTlKSUlRQUGBtm/fPuS5W7Zs0aJFizRz5kzNnDlTwWDwW88HAGA4PAds69atCoVCqq6u1o4dOzRv3jwVFRXpwIEDg57f3Nys66+/Xm+99ZZaWloUCAR01VVX6bPPPvvOiwcATF4JzjnnZUJBQYEuvfRSbdq0SZIUi8UUCAR0xx13aMWKFSed39/fr5kzZ2rTpk0qLS0d9Jze3l719vbGf45GowoEAuru7lZqaqqX5QIAJlg0GlVaWtqoP4Z7egbW19en1tZWBYPBr28gMVHBYFAtLS3Duo0jR47o6NGjOuOMM4Y8p6amRmlpafEjEAh4WSYAYBLwFLCuri719/fL7/cPGPf7/QqHw8O6jeXLl2vWrFkDIvhNlZWV6u7ujh+dnZ1elgkAmASmjOfF1q1bp/r6ejU3NyslJWXI83w+n3w+3ziuDABgjaeApaenKykpSZFIZMB4JBJRZmbmt859+OGHtW7dOr355puaO3eu95UCAPA/PL2EmJycrLy8PDU1NcXHYrGYmpqaVFhYOOS89evXa82aNWpsbFR+fv7IVwsAwP/z/BJiKBRSWVmZ8vPztWDBAtXW1qqnp0fl5eWSpNLSUmVnZ6umpkaS9Mc//lFVVVV67rnnlJOTE/+3stNPP12nn376KN4VAMBk4jlgJSUlOnjwoKqqqhQOh5Wbm6vGxsb4Gzs6OjqUmPj1E7vHHntMfX19+tWvfjXgdqqrq3Xvvfd+t9UDACYtz58Dmwhj9RkCAMDYOyU+BwYAwKmCgAEATCJgAACTCBgAwCQCBgAwiYABAEwiYAAAkwgYAMAkAgYAMImAAQBMImAAAJMIGADAJAIGADCJgAEATCJgAACTCBgAwCQCBgAwiYABAEwiYAAAkwgYAMAkAgYAMImAAQBMImAAAJMIGADAJAIGADCJgAEATCJgAACTCBgAwCQCBgAwiYABAEwiYAAAkwgYAMAkAgYAMImAAQBMImAAAJMIGADAJAIGADCJgAEATCJgAACTCBgAwCQCBgAwiYABAEwiYAAAkwgYAMAkAgYAMImAAQBMImAAAJMIGADAJAIGADCJgAEATCJgAACTCBgAwCQCBgAwiYABAEwiYAAAkwgYAMAkAgYAMImAAQBMImAAAJMIGADAJAIGADCJgAEATCJgAACTRhSwuro65eTkKCUlRQUFBdq+ffu3nv/CCy/ovPPOU0pKii6++GI1NDSMaLEAABznOWBbt25VKBRSdXW1duzYoXnz5qmoqEgHDhwY9Pz33ntP119/vW688Ubt3LlTixcv1uLFi/Xhhx9+58UDACavBOec8zKhoKBAl156qTZt2iRJisViCgQCuuOOO7RixYoTzi8pKVFPT49ee+21+NhPf/pT5ebmavPmzYNeo7e3V729vfGfu7u7deaZZ6qzs1OpqalelgsAmGDRaFSBQECHDh1SWlra6N2w86C3t9clJSW5l19+ecB4aWmpu/baawedEwgE3J/+9KcBY1VVVW7u3LlDXqe6utpJ4uDg4OD4Hh2ffPKJl+Sc1BR50NXVpf7+fvn9/gHjfr9fu3fvHnROOBwe9PxwODzkdSorKxUKheI/Hzp0SGeddZY6OjpGt97fM8f/K4dnqt+OfTo59mh42KfhOf4q2hlnnDGqt+spYOPF5/PJ5/OdMJ6WlsYvyTCkpqayT8PAPp0cezQ87NPwJCaO7hvfPd1aenq6kpKSFIlEBoxHIhFlZmYOOiczM9PT+QAADIengCUnJysvL09NTU3xsVgspqamJhUWFg46p7CwcMD5kvTGG28MeT4AAMPh+SXEUCiksrIy5efna8GCBaqtrVVPT4/Ky8slSaWlpcrOzlZNTY0k6c4779QVV1yhDRs26JprrlF9fb3ef/99Pf7448O+ps/nU3V19aAvK+Jr7NPwsE8nxx4ND/s0PGO1T57fRi9JmzZt0kMPPaRwOKzc3Fz9+c9/VkFBgSTpZz/7mXJycvT000/Hz3/hhRe0atUq7d+/Xz/5yU+0fv16XX311aN2JwAAk8+IAgYAwETjuxABACYRMACASQQMAGASAQMAmHTKBIw/0TI8XvZpy5YtWrRokWbOnKmZM2cqGAyedF+/D7z+Lh1XX1+vhIQELV68eGwXeIrwuk+HDh1SRUWFsrKy5PP5dM4550yK/9953afa2lqde+65mjZtmgKBgJYtW6avvvpqnFY7Md5++20VFxdr1qxZSkhI0CuvvHLSOc3Nzbrkkkvk8/l09tlnD3jn+rCN6jcrjlB9fb1LTk52Tz31lPvnP//pbr75ZjdjxgwXiUQGPf/dd991SUlJbv369e6jjz5yq1atclOnTnUffPDBOK98fHndpyVLlri6ujq3c+dOt2vXLveb3/zGpaWluX/961/jvPLx43WPjtu3b5/Lzs52ixYtcr/85S/HZ7ETyOs+9fb2uvz8fHf11Ve7d955x+3bt881Nze7tra2cV75+PK6T88++6zz+Xzu2Wefdfv27XOvv/66y8rKcsuWLRvnlY+vhoYGt3LlSvfSSy85SSd84fs3tbe3u9NOO82FQiH30UcfuUceecQlJSW5xsZGT9c9JQK2YMECV1FREf+5v7/fzZo1y9XU1Ax6/nXXXeeuueaaAWMFBQXut7/97Ziuc6J53advOnbsmJs+fbp75plnxmqJE24ke3Ts2DF32WWXuSeeeMKVlZVNioB53afHHnvMzZ492/X19Y3XEk8JXvepoqLC/fznPx8wFgqF3MKFC8d0naeS4QTs7rvvdhdeeOGAsZKSEldUVOTpWhP+EmJfX59aW1sVDAbjY4mJiQoGg2ppaRl0TktLy4DzJamoqGjI878PRrJP33TkyBEdPXp01L8R+lQx0j26//77lZGRoRtvvHE8ljnhRrJPr776qgoLC1VRUSG/36+LLrpIa9euVX9//3gte9yNZJ8uu+wytba2xl9mbG9vV0NDA1/c8A2j9Rg+4d9GP15/osW6kezTNy1fvlyzZs064Rfn+2Ike/TOO+/oySefVFtb2zis8NQwkn1qb2/X3//+d91www1qaGjQ3r17dfvtt+vo0aOqrq4ej2WPu5Hs05IlS9TV1aXLL79czjkdO3ZMt956q+65557xWLIZQz2GR6NRffnll5o2bdqwbmfCn4FhfKxbt0719fV6+eWXlZKSMtHLOSUcPnxYS5cu1ZYtW5Senj7RyzmlxWIxZWRk6PHHH1deXp5KSkq0cuXKIf+q+mTV3NystWvX6tFHH9WOHTv00ksvadu2bVqzZs1EL+17acKfgfEnWoZnJPt03MMPP6x169bpzTff1Ny5c8dymRPK6x598skn2r9/v4qLi+NjsVhMkjRlyhTt2bNHc+bMGdtFT4CR/C5lZWVp6tSpSkpKio+df/75CofD6uvrU3Jy8piueSKMZJ9Wr16tpUuX6qabbpIkXXzxxerp6dEtt9yilStXjvrfw7JqqMfw1NTUYT/7kk6BZ2D8iZbhGck+SdL69eu1Zs0aNTY2Kj8/fzyWOmG87tF5552nDz74QG1tbfHj2muv1ZVXXqm2tjYFAoHxXP64Gcnv0sKFC7V379544CXp448/VlZW1vcyXtLI9unIkSMnROp49B1fOxs3ao/h3t5fMjbq6+udz+dzTz/9tPvoo4/cLbfc4mbMmOHC4bBzzrmlS5e6FStWxM9/99133ZQpU9zDDz/sdu3a5aqrqyfN2+i97NO6detccnKye/HFF93nn38ePw4fPjxRd2HMed2jb5os70L0uk8dHR1u+vTp7ne/+53bs2ePe+2111xGRoZ74IEHJuoujAuv+1RdXe2mT5/u/vrXv7r29nb3t7/9zc2ZM8ddd911E3UXxsXhw4fdzp073c6dO50kt3HjRrdz50736aefOuecW7FihVu6dGn8/ONvo//DH/7gdu3a5erq6uy+jd455x555BF35plnuuTkZLdgwQL3j3/8I/6/XXHFFa6srGzA+c8//7w755xzXHJysrvwwgvdtm3bxnnFE8PLPp111llO0glHdXX1+C98HHn9XfpfkyVgznnfp/fee88VFBQ4n8/nZs+e7R588EF37NixcV71+POyT0ePHnX33nuvmzNnjktJSXGBQMDdfvvt7t///vf4L3wcvfXWW4M+1hzfm7KyMnfFFVecMCc3N9clJye72bNnu7/85S+er8ufUwEAmDTh/wYGAMBIEDAAgEkEDABgEgEDAJhEwAAAJhEwAIBJBAwAYBIBAwCYRMAAACYRMACASQQMAGDS/wFzTP77mPX4nAAAAABJRU5ErkJggg==\n"
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import gym\n",
    "from matplotlib import pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "#创建环境\n",
    "env = gym.make('CartPole-v0')\n",
    "env.reset()\n",
    "\n",
    "\n",
    "#打印游戏\n",
    "def show():\n",
    "    plt.imshow(env.render())\n",
    "    plt.show()\n",
    "\n",
    "\n",
    "show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "这个游戏的状态用4个数字表示,我也不知道这4个数字分别是什么意思,反正这4个数字就能描述游戏全部的状态\n",
      "state= (array([0.01492591, 0.04797895, 0.02529124, 0.03190754], dtype=float32), {})\n",
      "这个游戏一共有2个动作,不是0就是1\n",
      "env.action_space= Discrete(2)\n",
      "随机一个动作\n",
      "action= 1\n",
      "执行一个动作,得到下一个状态,奖励,是否结束\n"
     ]
    },
    {
     "ename": "ValueError",
     "evalue": "too many values to unpack (expected 4)",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mValueError\u001B[0m                                Traceback (most recent call last)",
      "Input \u001B[1;32mIn [3]\u001B[0m, in \u001B[0;36m<cell line: 30>\u001B[1;34m()\u001B[0m\n\u001B[0;32m     26\u001B[0m     \u001B[38;5;28mprint\u001B[39m(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mover=\u001B[39m\u001B[38;5;124m'\u001B[39m, over)\n\u001B[0;32m     27\u001B[0m     \u001B[38;5;66;03m#over= False\u001B[39;00m\n\u001B[1;32m---> 30\u001B[0m \u001B[43mtest_env\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n",
      "Input \u001B[1;32mIn [3]\u001B[0m, in \u001B[0;36mtest_env\u001B[1;34m()\u001B[0m\n\u001B[0;32m     15\u001B[0m \u001B[38;5;66;03m#action= 1\u001B[39;00m\n\u001B[0;32m     17\u001B[0m \u001B[38;5;28mprint\u001B[39m(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m执行一个动作,得到下一个状态,奖励,是否结束\u001B[39m\u001B[38;5;124m'\u001B[39m)\n\u001B[1;32m---> 18\u001B[0m state, reward, over, _ \u001B[38;5;241m=\u001B[39m env\u001B[38;5;241m.\u001B[39mstep(action)\n\u001B[0;32m     20\u001B[0m \u001B[38;5;28mprint\u001B[39m(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mstate=\u001B[39m\u001B[38;5;124m'\u001B[39m, state)\n\u001B[0;32m     21\u001B[0m \u001B[38;5;66;03m#state= [ 0.02018229 -0.16441101  0.01547085  0.2661691 ]\u001B[39;00m\n",
      "\u001B[1;31mValueError\u001B[0m: too many values to unpack (expected 4)"
     ]
    }
   ],
   "source": [
    "#测试游戏环境\n",
    "def test_env():\n",
    "    state = env.reset()\n",
    "    print('这个游戏的状态用4个数字表示,我也不知道这4个数字分别是什么意思,反正这4个数字就能描述游戏全部的状态')\n",
    "    print('state=', state)\n",
    "    #state= [ 0.03490619  0.04873464  0.04908862 -0.00375859]\n",
    "\n",
    "    print('这个游戏一共有2个动作,不是0就是1')\n",
    "    print('env.action_space=', env.action_space)\n",
    "    #env.action_space= Discrete(2)\n",
    "\n",
    "    print('随机一个动作')\n",
    "    action = env.action_space.sample()\n",
    "    print('action=', action)\n",
    "    #action= 1\n",
    "\n",
    "    print('执行一个动作,得到下一个状态,奖励,是否结束')\n",
    "    state, reward, over, _ = env.step(action)\n",
    "\n",
    "    print('state=', state)\n",
    "    #state= [ 0.02018229 -0.16441101  0.01547085  0.2661691 ]\n",
    "\n",
    "    print('reward=', reward)\n",
    "    #reward= 1.0\n",
    "\n",
    "    print('over=', over)\n",
    "    #over= False\n",
    "\n",
    "\n",
    "test_env()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Sequential(\n",
       "  (0): Linear(in_features=4, out_features=128, bias=True)\n",
       "  (1): ReLU()\n",
       "  (2): Linear(in_features=128, out_features=2, bias=True)\n",
       ")"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "\n",
    "#计算动作的模型,也是真正要用的模型\n",
    "model = torch.nn.Sequential(\n",
    "    torch.nn.Linear(4, 128),\n",
    "    torch.nn.ReLU(),\n",
    "    torch.nn.Linear(128, 2),\n",
    ")\n",
    "\n",
    "model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import random\n",
    "\n",
    "\n",
    "#得到一个动作\n",
    "def get_action(state):\n",
    "    if random.random() < 0.01:\n",
    "        return random.choice([0, 1])\n",
    "\n",
    "    #走神经网络,得到一个动作\n",
    "    state = torch.FloatTensor(state).reshape(1, 4)\n",
    "\n",
    "    return model(state).argmax().item()\n",
    "\n",
    "\n",
    "get_action([0.0013847, -0.01194451, 0.04260966, 0.00688801])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((208, 0), 208)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#样本池\n",
    "datas = []\n",
    "\n",
    "\n",
    "#向样本池中添加N条数据,删除M条最古老的数据\n",
    "def update_data():\n",
    "    old_count = len(datas)\n",
    "\n",
    "    #玩到新增了N个数据为止\n",
    "    while len(datas) - old_count < 200:\n",
    "        #初始化游戏\n",
    "        state = env.reset()\n",
    "\n",
    "        #玩到游戏结束为止\n",
    "        over = False\n",
    "        while not over:\n",
    "            #根据当前状态得到一个动作\n",
    "            action = get_action(state)\n",
    "\n",
    "            #执行动作,得到反馈\n",
    "            next_state, reward, over, _ = env.step(action)\n",
    "\n",
    "            #记录数据样本\n",
    "            datas.append((state, action, reward, next_state, over))\n",
    "\n",
    "            #更新游戏状态,开始下一个动作\n",
    "            state = next_state\n",
    "\n",
    "    update_count = len(datas) - old_count\n",
    "    drop_count = max(len(datas) - 10000, 0)\n",
    "\n",
    "    #数据上限,超出时从最古老的开始删除\n",
    "    while len(datas) > 10000:\n",
    "        datas.pop(0)\n",
    "\n",
    "    return update_count, drop_count\n",
    "\n",
    "\n",
    "update_data(), len(datas)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/anaconda3/envs/cpu/lib/python3.6/site-packages/ipykernel_launcher.py:7: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at  /opt/conda/conda-bld/pytorch_1640811701593/work/torch/csrc/utils/tensor_new.cpp:201.)\n",
      "  import sys\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(tensor([[-0.1403, -1.5900,  0.1024,  2.2623],\n",
       "         [-0.0193,  0.0074,  0.0325,  0.0051],\n",
       "         [-0.0843, -1.4061,  0.1112,  2.0697],\n",
       "         [ 0.0169, -0.5740, -0.0193,  0.8088],\n",
       "         [-0.0628, -0.9541,  0.0700,  1.5112]]),\n",
       " tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,\n",
       "         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),\n",
       " tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
       "         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
       "         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
       "         1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]),\n",
       " tensor([[-0.1721, -1.7859,  0.1477,  2.5847],\n",
       "         [-0.0192, -0.1882,  0.0326,  0.3079],\n",
       "         [-0.1124, -1.6021,  0.1526,  2.3946],\n",
       "         [ 0.0054, -0.7689, -0.0031,  1.0953],\n",
       "         [-0.0819, -1.1500,  0.1002,  1.8249]]),\n",
       " tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,\n",
       "         0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,\n",
       "         0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0]))"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#获取一批数据样本\n",
    "def get_sample():\n",
    "    #从样本池中采样\n",
    "    samples = random.sample(datas, 64)\n",
    "\n",
    "    #[b, 4]\n",
    "    state = torch.FloatTensor([i[0] for i in samples])\n",
    "    #[b]\n",
    "    action = torch.LongTensor([i[1] for i in samples])\n",
    "    #[b]\n",
    "    reward = torch.FloatTensor([i[2] for i in samples])\n",
    "    #[b, 4]\n",
    "    next_state = torch.FloatTensor([i[3] for i in samples])\n",
    "    #[b]\n",
    "    over = torch.LongTensor([i[4] for i in samples])\n",
    "\n",
    "    return state, action, reward, next_state, over\n",
    "\n",
    "\n",
    "state, action, reward, next_state, over = get_sample()\n",
    "\n",
    "state[:5], action, reward, next_state[:5], over"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([-0.1866, -0.0030, -0.1721, -0.0867, -0.1318, -0.1368, -0.1860, -0.0521,\n",
       "        -0.0928, -0.2029, -0.1845, -0.1608, -0.0451, -0.1302, -0.1117, -0.1741,\n",
       "        -0.1621, -0.0115, -0.2227, -0.0518, -0.1125, -0.1986, -0.1635, -0.1590,\n",
       "        -0.1749, -0.1699, -0.1413, -0.1478, -0.0684, -0.2062, -0.1351, -0.2119,\n",
       "        -0.1064, -0.0067, -0.0450, -0.1806, -0.0099, -0.1108, -0.0029, -0.1937,\n",
       "        -0.2198, -0.2203,  0.0058, -0.0088, -0.1348, -0.0481, -0.1831, -0.1888,\n",
       "        -0.1329, -0.0877, -0.0828, -0.1478, -0.1987, -0.0943, -0.2009, -0.0639,\n",
       "        -0.0113, -0.1290, -0.1587, -0.0151, -0.0658, -0.1523, -0.2234, -0.1966],\n",
       "       grad_fn=<IndexBackward0>)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def get_value(state, action):\n",
    "    #使用状态计算出动作的logits\n",
    "    #[b, 4] -> [b, 2]\n",
    "    value = model(state)\n",
    "\n",
    "    #根据实际使用的action取出每一个值\n",
    "    #这个值就是模型评估的在该状态下,执行动作的分数\n",
    "    #在执行动作前,显然并不知道会得到的反馈和next_state\n",
    "    #所以这里不能也不需要考虑next_state和reward\n",
    "    #[b, 2] -> [b]\n",
    "    value = value[range(64), action]\n",
    "\n",
    "    return value\n",
    "\n",
    "\n",
    "get_value(state, action)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([0.7923, 0.9600, 0.8054, 0.8917, 0.8459, 0.8388, 0.7920, 0.9290, 0.8881,\n",
       "        0.7770, 0.7942, 0.8171, 0.9330, 0.8552, 0.8679, 0.8031, 1.0057, 0.9492,\n",
       "        1.0000, 0.9301, 0.8697, 0.7811, 0.8167, 0.8206, 0.8027, 0.8076, 0.8424,\n",
       "        0.8327, 0.9123, 1.0000, 0.8444, 0.7698, 0.8759, 0.9516, 0.9334, 0.7980,\n",
       "        0.9535, 0.8728, 0.9599, 0.7846, 1.0000, 1.0000, 0.9595, 0.9566, 0.8398,\n",
       "        0.9288, 0.7957, 1.0000, 0.8508, 0.8891, 0.8957, 0.8312, 0.7817, 0.8856,\n",
       "        1.0000, 0.9188, 0.9569, 0.8520, 0.8210, 0.9534, 0.9158, 0.8269, 1.0000,\n",
       "        0.7832])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def get_target(reward, next_state, over):\n",
    "    #上面已经把模型认为的状态下执行动作的分数给评估出来了\n",
    "    #下面使用next_state和reward计算真实的分数\n",
    "    #针对一个状态,它到底应该多少分,可以使用以往模型积累的经验评估\n",
    "    #这也是没办法的办法,因为显然没有精确解,这里使用延迟更新的next_model评估\n",
    "\n",
    "    #使用next_state计算下一个状态的分数\n",
    "    #[b, 4] -> [b, 2]\n",
    "    with torch.no_grad():\n",
    "        target = model(next_state)\n",
    "\n",
    "    #取所有动作中分数最大的\n",
    "    #[b, 2] -> [b]\n",
    "    target = target.max(dim=1)[0]\n",
    "\n",
    "    #如果next_state已经游戏结束,则next_state的分数是0\n",
    "    #因为如果下一步已经游戏结束,显然不需要再继续玩下去,也就不需要考虑next_state了.\n",
    "    #[b]\n",
    "    for i in range(64):\n",
    "        if over[i]:\n",
    "            target[i] = 0\n",
    "\n",
    "    #下一个状态的分数乘以一个系数,相当于权重\n",
    "    #[b] * [b] -> [b]\n",
    "    target *= 0.98\n",
    "\n",
    "    #加上reward就是最终的分数\n",
    "    #[b] + [b] -> [b]\n",
    "    target += reward\n",
    "\n",
    "    return target\n",
    "\n",
    "\n",
    "get_target(reward, next_state, over)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10.0"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from IPython import display\n",
    "\n",
    "\n",
    "def test(play):\n",
    "    #初始化游戏\n",
    "    state = env.reset()\n",
    "\n",
    "    #记录反馈值的和,这个值越大越好\n",
    "    reward_sum = 0\n",
    "\n",
    "    #玩到游戏结束为止\n",
    "    over = False\n",
    "    while not over:\n",
    "        #根据当前状态得到一个动作\n",
    "        action = get_action(state)\n",
    "\n",
    "        #执行动作,得到反馈\n",
    "        state, reward, over, _ = env.step(action)\n",
    "        reward_sum += reward\n",
    "\n",
    "        #打印动画\n",
    "        if play:\n",
    "            display.clear_output(wait=True)\n",
    "            show()\n",
    "\n",
    "    return reward_sum\n",
    "\n",
    "\n",
    "test(play=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "id": "OHoSU6uI-xIt",
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 415 207 0 12.35\n",
      "50 10000 241 241 113.55\n",
      "100 10000 200 200 164.7\n",
      "150 10000 200 200 182.95\n",
      "200 10000 340 340 193.9\n",
      "250 10000 200 200 165.05\n",
      "300 10000 282 282 170.7\n",
      "350 10000 200 200 178.4\n",
      "400 10000 200 200 186.65\n",
      "450 10000 200 200 198.35\n"
     ]
    }
   ],
   "source": [
    "def train():\n",
    "    model.train()\n",
    "    optimizer = torch.optim.Adam(model.parameters(), lr=2e-3)\n",
    "    loss_fn = torch.nn.MSELoss()\n",
    "\n",
    "    #训练N次\n",
    "    for epoch in range(500):\n",
    "        #更新N条数据\n",
    "        update_count, drop_count = update_data()\n",
    "\n",
    "        #每次更新过数据后,学习N次\n",
    "        for i in range(200):\n",
    "            #采样一批数据\n",
    "            state, action, reward, next_state, over = get_sample()\n",
    "\n",
    "            #计算一批样本的value和target\n",
    "            value = get_value(state, action)\n",
    "            target = get_target(reward, next_state, over)\n",
    "\n",
    "            #更新参数\n",
    "            loss = loss_fn(value, target)\n",
    "            optimizer.zero_grad()\n",
    "            loss.backward()\n",
    "            optimizer.step()\n",
    "\n",
    "        if epoch % 50 == 0:\n",
    "            test_result = sum([test(play=False) for _ in range(20)]) / 20\n",
    "            print(epoch, len(datas), update_count, drop_count, test_result)\n",
    "\n",
    "\n",
    "train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAD8CAYAAABXe05zAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAATyklEQVR4nO3df6zdd33f8efLjmOnidXY5CYY/1hcarQmdHWqKw8pbMuANW421SCVyUhF/iOSkTAaaBVd0mor/GGpmwrsn4EURlSPUYwRRLEytjW4oAy1i3HSJNhx3Fywm9zYsW/CIAmhjn+898f9WjnY9/oen3uOj7/3Ph/S0fme9/fzPef9sZKXv/7c7/eeVBWSpPZYMOwGJEmXxuCWpJYxuCWpZQxuSWoZg1uSWsbglqSWGVhwJ9mY5FCSsST3DOpzJGm+ySCu406yEPhb4F8A48D3gQ9V1dN9/zBJmmcGdca9ARirqh9V1RvATmDTgD5LkuaVqwb0viuB5ztejwP/eLrBN9xwQ918880DakWS2ufIkSO89NJLmWrfoIJ7qg/7hTWZJFuBrQBr1qxh3759A2pFktpndHR02n2DWioZB1Z3vF4FHO0cUFX3VdVoVY2OjIwMqA1JmnsGFdzfB9YlWZvkamAzsHtAnyVJ88pAlkqq6nSSjwH/G1gI3F9VBwbxWZI03wxqjZuq+hbwrUG9vyTNV945KUktY3BLUssY3JLUMga3JLWMwS1JLWNwS1LLGNyS1DIGtyS1jMEtSS1jcEtSyxjcktQyBrcktYzBLUktY3BLUssY3JLUMga3JLWMwS1JLWNwS1LLzOqry5IcAV4FzgCnq2o0yXLga8DNwBHgX1fV/5tdm5Kkc/pxxv3Pq2p9VY02r+8B9lTVOmBP81qS1CeDWCrZBOxotncA7x/AZ0jSvDXb4C7gL5I8lmRrU7upqo4BNM83zvIzJEkdZrXGDdxeVUeT3Ag8nOSZbg9sgn4rwJo1a2bZhiTNH7M6466qo83zCeABYANwPMkKgOb5xDTH3ldVo1U1OjIyMps2JGle6Tm4k1ybZOm5beC3gP3AbmBLM2wL8OBsm5QkvWk2SyU3AQ8kOfc+f15V/yvJ94FdSe4GngM+OPs2JUnn9BzcVfUj4DemqL8MvHc2TUmSpuedk5LUMga3JLWMwS1JLWNwS1LLGNyS1DIGtyS1jMEtSS1jcEtSyxjcktQyBrcktYzBLUktY3BLUssY3JLUMga3JLWMwS1JLWNwS1LLGNyS1DIGtyS1jMEtSS0zY3AnuT/JiST7O2rLkzyc5NnmeVnHvnuTjCU5lOTOQTUuSfNVN2fcfwZsPK92D7CnqtYBe5rXJLkF2Azc2hzz+SQL+9atJGnm4K6qR4Afn1feBOxotncA7++o76yqk1V1GBgDNvSnVUkS9L7GfVNVHQNonm9s6iuB5zvGjTe1CyTZmmRfkn0TExM9tiFJ80+/fziZKWo11cCquq+qRqtqdGRkpM9tSNLc1WtwH0+yAqB5PtHUx4HVHeNWAUd7b0+SdL5eg3s3sKXZ3gI82FHfnGRxkrXAOmDv7FqUJHW6aqYBSb4K3AHckGQc+GPgT4BdSe4GngM+CFBVB5LsAp4GTgPbqurMgHqXpHlpxuCuqg9Ns+u904zfDmyfTVOSpOl556QktYzBLUktY3BLUssY3JLUMga3JLWMwS1JLWNwS1LLGNyS1DIGtyS1jMEtSS1jcEtSyxjcktQyBrcktYzBLUktY3BLUssY3JLUMga3JLWMwS1JLTNjcCe5P8mJJPs7ap9K8kKSJ5rHXR377k0yluRQkjsH1bgkzVfdnHH/GbBxivrnqmp98/gWQJJbgM3Arc0xn0+ysF/NSpK6CO6qegT4cZfvtwnYWVUnq+owMAZsmEV/kqTzzGaN+2NJnmqWUpY1tZXA8x1jxpvaBZJsTbIvyb6JiYlZtCFJ80uvwf0F4O3AeuAY8JmmninG1lRvUFX3VdVoVY2OjIz02IYkzT89BXdVHa+qM1V1Fvgiby6HjAOrO4auAo7OrkVJUqeegjvJio6XHwDOXXGyG9icZHGStcA6YO/sWpQkdbpqpgFJvgrcAdyQZBz4Y+COJOuZXAY5AnwEoKoOJNkFPA2cBrZV1ZmBdC5J89SMwV1VH5qi/KWLjN8ObJ9NU5Kk6XnnpCS1jMEtSS1jcEtSyxjcktQyBrcktcyMV5Voaj+b+DvOvPHzC+q/dMMarlr8S0PoSNJ8YXD3oKp47q928tqLP7xg3z/8nT9g6YpfHUJXkuYLl0p6UUXVlL+CRZIGzuDuQdVZMLglDYnB3Ysqg1vS0BjcPag6S03922olaeAM7l54xi1piAzuHhQGt6ThMbh74VKJpCEyuHtQLpVIGiKDuxcGt6QhMrh7UWe9AUfS0BjcPZgMbYNb0nDMGNxJVif5TpKDSQ4k+XhTX57k4STPNs/LOo65N8lYkkNJ7hzkBIahvOVd0hB1c8Z9Gvj9qvo14F3AtiS3APcAe6pqHbCneU2zbzNwK7AR+HyShYNofmi85V3SEM0Y3FV1rKoeb7ZfBQ4CK4FNwI5m2A7g/c32JmBnVZ2sqsPAGLChz30Pl0slkobokta4k9wM3AY8CtxUVcdgMtyBG5thK4HnOw4bb2rnv9fWJPuS7JuYmOih9eFxqUTSMHUd3EmuA74BfKKqXrnY0ClqF6RcVd1XVaNVNToyMtJtG1eGOjv5kKQh6Cq4kyxiMrS/UlXfbMrHk6xo9q8ATjT1cWB1x+GrgKP9affKcPLVlzn1+k8vqF+99C0suvaXh9CRpPmkm6tKAnwJOFhVn+3YtRvY0mxvAR7sqG9OsjjJWmAdsLd/LQ9fnTlFnT1zQX3BVVezYKFfKiRpsLpJmduBDwM/SPJEU/tD4E+AXUnuBp4DPghQVQeS7AKeZvKKlG1VdWHKzUFhAVOvFElS/8wY3FX1PaZPo/dOc8x2YPss+mqnhMl/oEjS4HjnZB8lAYNb0oAZ3P0Ul0okDZ7B3UdJSPwjlTRYpkw/JZ5wSxo4g7uPkgXNcokkDY4p008J8ZRb0oAZ3P3kVSWSLgODu48ml0oMbkmDZXD3k0slki4Dg7uPPOOWdDkY3P3kLe+SLgODu48ml0kMbkmDZXBfoot+841LJZIuA4O7BzXN901OXg1ocEsaLIO7B3XWry2TNDwGdw/myfdCSLpCGdw98Ixb0jAZ3L0wuCUNUTdfFrw6yXeSHExyIMnHm/qnkryQ5InmcVfHMfcmGUtyKMmdg5zAMLhUImmYuvmy4NPA71fV40mWAo8lebjZ97mq+tPOwUluATYDtwJvA76d5B1z6QuDXSqRNEwznnFX1bGqerzZfhU4CKy8yCGbgJ1VdbKqDgNjwIZ+NHulqLNz5u8gSS10SWvcSW4GbgMebUofS/JUkvuTLGtqK4HnOw4b5+JB3zpVnnFLGp6ugzvJdcA3gE9U1SvAF4C3A+uBY8Bnzg2d4vAL7lhJsjXJviT7JiYmLrXv4fKMW9IQdRXcSRYxGdpfqapvAlTV8ao6U5Onn1/kzeWQcWB1x+GrgKPnv2dV3VdVo1U1OjIyMps5XHaecUsapm6uKgnwJeBgVX22o76iY9gHgP3N9m5gc5LFSdYC64C9/Wt5+PzhpKRh6uaqktuBDwM/SPJEU/tD4ENJ1jO5DHIE+AhAVR1Isgt4mskrUrbNpStKwB9OShquGYO7qr7H1OvW37rIMduB7bPo64rmUomkYfLOyR64VCJpmAzuXsytlR9JLWNw9+Ds6TemrGdBNz8ykKTZMbgvWfHqsWen3HPdinWXuRdJ85HB3YPp1rgXLPSMW9LgGdx9lCwcdguS5gGDu4+ywD9OSYNn0vRT/OOUNHgmTd+ELHCpRNLgGdx95FKJpMvBpOkjfzgp6XIwuPvIM25Jl4MXHjf279/PK6+8MuO4AAt//voFv3WrKA4+c4h64bUZ32PhwoXcdtttXH311b01K2leM7gb27Zt45FHHplxXAJf/fe/y6+8bdkv1Ots8W8+/gn2HbrgOyMucO211zI2NsZb3/rWnvuVNH8Z3D362ZmlHP37d3C6ruLGxX/H9Qtf5Iy/NVDSZWBw9+C1M8t4/qe/zetnfxmA5//+17jl2v/D6TMXfLWmJPWdP027ZOEHr/0zXj97PZMr3uEMV7P/tX/Cq6eWDrk3SfOBwd2D07XogtqZuorT/ppuSZdBN18WvCTJ3iRPJjmQ5NNNfXmSh5M82zwv6zjm3iRjSQ4luXOQExiGaxZceOXI4gU/p+rUELqRNN90c8Z9EnhPVf0GsB7YmORdwD3AnqpaB+xpXpPkFmAzcCuwEfh85tSdKcWvL/0ub1n0Ags4A5zlmgWvsH7pt1nMT4fdnKR5oJsvCy7g3CnmouZRwCbgjqa+A/gu8O+a+s6qOgkcTjIGbAD+errPOHXqFC+++GJvM+iTN96Y+lttzlcF/+1/fI9rrnmSl06t5GxdxbJFL/JXC15l4ievd/kexcTExGzalTTHnTo1/b/gu7qqpDljfgz4VeC/VNWjSW6qqmMAVXUsyY3N8JXA/+04fLypTevll1/my1/+cjetDMzx48e7Hrvn8cPN1lM9fdbp06f5+te/ztKl/jBT0tRefvnlafd1FdxVdQZYn+R64IEk77zI8PNvKoTJM/RfHJRsBbYCrFmzhk9+8pPdtDIwDz30EIcPH555YB8sWrSIj370o96AI2laX/va16bdd0lXlVTVT5hcEtkIHE+yAqB5PtEMGwdWdxy2CrjgdsKquq+qRqtqdGRk5FLakKR5rZurSkaaM22SXAO8D3gG2A1saYZtAR5stncDm5MsTrIWWAfs7XPfkjRvdbNUsgLY0axzLwB2VdVDSf4a2JXkbuA54IMAVXUgyS7gaeA0sK1ZapEk9UE3V5U8Bdw2Rf1l4L3THLMd2D7r7iRJF/DOSUlqGYNbklrG3w7YePe7383y5csvy2ctWbKEJUuWXJbPkjT3GNyN7dtdkpfUDi6VSFLLGNyS1DIGtyS1jMEtSS1jcEtSyxjcktQyBrcktYzBLUktY3BLUssY3JLUMga3JLWMwS1JLWNwS1LLGNyS1DLdfFnwkiR7kzyZ5ECSTzf1TyV5IckTzeOujmPuTTKW5FCSOwc5AUmab7r5fdwngfdU1WtJFgHfS/I/m32fq6o/7Ryc5BZgM3Ar8Dbg20ne4RcGS1J/zHjGXZNea14uah51kUM2ATur6mRVHQbGgA2z7lSSBHS5xp1kYZIngBPAw1X1aLPrY0meSnJ/kmVNbSXwfMfh401NktQHXQV3VZ2pqvXAKmBDkncCXwDeDqwHjgGfaYZnqrc4v5Bka5J9SfZNTEz00LokzU+XdFVJVf0E+C6wsaqON4F+Fvgiby6HjAOrOw5bBRyd4r3uq6rRqhodGRnppXdJmpe6uapkJMn1zfY1wPuAZ5Ks6Bj2AWB/s70b2JxkcZK1wDpgb1+7lqR5rJurSlYAO5IsZDLod1XVQ0m+nGQ9k8sgR4CPAFTVgSS7gKeB08A2ryiRpP6ZMbir6ingtinqH77IMduB7bNrTZI0Fe+clKSWMbglqWUMbklqGYNbklrG4JakljG4JallDG5JahmDW5JaxuCWpJYxuCWpZQxuSWoZg1uSWsbglqSWMbglqWUMbklqGYNbklrG4JakljG4JallDG5JahmDW5JaxuCWpJYxuCWpZVJVw+6BJBPAz4CXht3LANyA82qbuTo359Uu/6CqRqbacUUEN0CSfVU1Ouw++s15tc9cnZvzmjtcKpGkljG4JallrqTgvm/YDQyI82qfuTo35zVHXDFr3JKk7lxJZ9ySpC4MPbiTbExyKMlYknuG3c+lSnJ/khNJ9nfUlid5OMmzzfOyjn33NnM9lOTO4XQ9sySrk3wnycEkB5J8vKm3em5JliTZm+TJZl6fbuqtntc5SRYm+ZskDzWv58q8jiT5QZInkuxranNibj2pqqE9gIXAD4FfAa4GngRuGWZPPczhnwK/CezvqP0n4J5m+x7gPzbbtzRzXAysbea+cNhzmGZeK4DfbLaXAn/b9N/quQEBrmu2FwGPAu9q+7w65vdvgT8HHpor/y02/R4BbjivNifm1stj2GfcG4CxqvpRVb0B7AQ2DbmnS1JVjwA/Pq+8CdjRbO8A3t9R31lVJ6vqMDDG5J/BFaeqjlXV4832q8BBYCUtn1tNeq15uah5FC2fF0CSVcC/BP5rR7n187qIuTy3ixp2cK8Enu94Pd7U2u6mqjoGkwEI3NjUWznfJDcDtzF5dtr6uTXLCU8AJ4CHq2pOzAv4z8AfAGc7anNhXjD5l+tfJHksydamNlfmdsmuGvLnZ4raXL7MpXXzTXId8A3gE1X1SjLVFCaHTlG7IudWVWeA9UmuBx5I8s6LDG/FvJL8K+BEVT2W5I5uDpmidsXNq8PtVXU0yY3Aw0meucjYts3tkg37jHscWN3xehVwdEi99NPxJCsAmucTTb1V802yiMnQ/kpVfbMpz4m5AVTVT4DvAhtp/7xuB34nyREmlxzfk+S/0/55AVBVR5vnE8ADTC59zIm59WLYwf19YF2StUmuBjYDu4fcUz/sBrY021uABzvqm5MsTrIWWAfsHUJ/M8rkqfWXgINV9dmOXa2eW5KR5kybJNcA7wOeoeXzqqp7q2pVVd3M5P9Hf1lVv0fL5wWQ5NokS89tA78F7GcOzK1nw/7pKHAXk1cs/BD4o2H300P/XwWOAaeY/Jv+buAtwB7g2eZ5ecf4P2rmegj47WH3f5F5vZvJf14+BTzRPO5q+9yAfwT8TTOv/cB/aOqtntd5c7yDN68qaf28mLzq7MnmceBcTsyFufX68M5JSWqZYS+VSJIukcEtSS1jcEtSyxjcktQyBrcktYzBLUktY3BLUssY3JLUMv8fXSkG+8LgDB8AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "200.0"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test(play=True)"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "第7章-DQN算法.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
